0.1 Import Data

if (!require("pacman")) install.packages("pacman")
pacman::p_load(tidyverse, broom, skimr, devtools, ggpubr, mgcv, extrafont, mgcViz, here)

theme_set(theme_pubclean(base_size = 14))
# import
lesion <- read_csv(here("cache", "summarised_lesion_data.csv"))
weather <- read_csv(here("cache", "weather_summary.csv"))

dat <- left_join(lesion, weather, by = c("site" = "Location", "rep" = "Rep"))

0.2 Fit GAMs

For reproducibility purposes, use set.seed().

set.seed(27)

0.2.1 mod1 - s(Distance)

mod1 <-
   gam(
      mean_pot_count ~ s(distance, k = 5),
      data = dat
   )

summary(mod1)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## mean_pot_count ~ s(distance, k = 5)
## 
## Parametric coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)   1.0919     0.0592    18.4 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##              edf Ref.df    F             p-value    
## s(distance) 3.84   3.98 45.2 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.435   Deviance explained = 44.4%
## GCV = 0.82474  Scale est. = 0.80738   n = 230

0.2.2 mod2 - s(Distance) + Precipitation

mod2 <-
   gam(
      mean_pot_count ~ sum_rain+ s(distance, k = 5),
      data = dat
   )

summary(mod2)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## mean_pot_count ~ sum_rain + s(distance, k = 5)
## 
## Parametric coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)   1.1685     0.0746   15.67 <0.0000000000000002 ***
## sum_rain     -0.0731     0.0435   -1.68               0.094 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##              edf Ref.df    F             p-value    
## s(distance) 3.83   3.98 45.5 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.439   Deviance explained = 45.1%
## GCV = 0.82197  Scale est. = 0.80113   n = 230

0.2.3 mod3 - s(Distance) + Windspeed

mod3 <-
   gam(mean_pot_count ~ mws + s(distance, k = 5),
       data = dat)

summary(mod3)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## mean_pot_count ~ mws + s(distance, k = 5)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.5928     0.1510    3.93  0.00012 ***
## mws           0.1383     0.0387    3.58  0.00043 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##              edf Ref.df    F             p-value    
## s(distance) 3.85   3.98 48.5 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.463   Deviance explained = 47.4%
## GCV = 0.78717  Scale est. = 0.76714   n = 230

0.2.4 mod4 - s(Distance) + Windspeed + Precipitation

mod4 <-
   gam(mean_pot_count ~ sum_rain + mws + s(distance, k = 5),
       data = dat)

summary(mod4)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## mean_pot_count ~ sum_rain + mws + s(distance, k = 5)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.6641     0.1547    4.29 0.000026 ***
## sum_rain     -0.0811     0.0424   -1.91  0.05694 .  
## mws           0.1421     0.0385    3.69  0.00028 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##              edf Ref.df  F             p-value    
## s(distance) 3.84   3.98 49 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.469   Deviance explained = 48.3%
## GCV = 0.78164  Scale est. = 0.75838   n = 230

0.2.5 mod5 - s(Distance + Windspeed) + Precipitation

mod5 <-
   gam(
      mean_pot_count ~ sum_rain + s(distance + mws, k = 5),
      data = dat
   )
## Warning in term[i] <- attr(terms(reformulate(term[i])), "term.labels"): number
## of items to replace is not a multiple of replacement length
summary(mod5)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## mean_pot_count ~ sum_rain + s(distance + mws, k = 5)
## 
## Parametric coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)   1.1685     0.0746   15.67 <0.0000000000000002 ***
## sum_rain     -0.0731     0.0435   -1.68               0.094 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##              edf Ref.df    F             p-value    
## s(distance) 3.83   3.98 45.5 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.439   Deviance explained = 45.1%
## GCV = 0.82197  Scale est. = 0.80113   n = 230

0.2.6 mod6 - s(Distance) + s(Windspeed) + Precipitation

mod6 <-
   gam(
      mean_pot_count ~ sum_rain + s(distance, k = 5) + s(mws, k = 5),
      data = dat
   )

summary(mod6)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## mean_pot_count ~ sum_rain + s(distance, k = 5) + s(mws, k = 5)
## 
## Parametric coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)   1.0351     0.0788   13.13 <0.0000000000000002 ***
## sum_rain      0.0542     0.0553    0.98                0.33    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##              edf Ref.df    F              p-value    
## s(distance) 3.88   3.99 54.5 < 0.0000000000000002 ***
## s(mws)      3.95   4.00 12.8         0.0000000014 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =   0.54   Deviance explained = 55.8%
## GCV = 0.68596  Scale est. = 0.65664   n = 230

0.2.7 mod7 - s(Distance) + s(Windspeed)

mod7 <-
   gam(
      mean_pot_count ~ s(distance, k = 5) + s(mws, k = 5),
      data = dat
   )

summary(mod7)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## mean_pot_count ~ s(distance, k = 5) + s(mws, k = 5)
## 
## Parametric coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)   1.0919     0.0534    20.4 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##              edf Ref.df    F              p-value    
## s(distance) 3.88   3.99 54.7 < 0.0000000000000002 ***
## s(mws)      3.95   4.00 13.4        0.00000000051 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =   0.54   Deviance explained = 55.6%
## GCV = 0.68285  Scale est. = 0.65663   n = 230

0.2.8 mod8 - s(Distance) + s(Windspeed) + s(Precipitation)

mod8 <-
   gam(
      mean_pot_count ~ s(distance, k = 5) + s(mws, k = 5) + s(sum_rain, k = 5),
      data = dat
   )

summary(mod8)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## mean_pot_count ~ s(distance, k = 5) + s(mws, k = 5) + s(sum_rain, 
##     k = 5)
## 
## Parametric coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)   1.0919     0.0534    20.4 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##              edf Ref.df     F             p-value    
## s(distance) 3.88   3.99 54.78 <0.0000000000000002 ***
## s(mws)      2.56   2.74  7.92              0.0038 ** 
## s(sum_rain) 2.14   2.21  5.63              0.0018 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =   0.54   Deviance explained = 55.8%
## GCV = 0.68522  Scale est. = 0.65666   n = 230

0.2.9 mod9 - s(Distance) + s(Precipitation)

mod9 <-
   gam(
      mean_pot_count ~ s(distance, k = 5) + s(sum_rain, k = 5),
      data = dat
   )

summary(mod9)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## mean_pot_count ~ s(distance, k = 5) + s(sum_rain, k = 5)
## 
## Parametric coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)   1.0919     0.0546      20 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##              edf Ref.df    F              p-value    
## s(distance) 3.87   3.99 53.3 < 0.0000000000000002 ***
## s(sum_rain) 3.87   3.99 11.3          0.000000097 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =   0.52   Deviance explained = 53.6%
## GCV = 0.71262  Scale est. = 0.68552   n = 230

0.2.10 mod10 - s(Distance) +s(Precipitation) + Windspeed

mod10 <-
   gam(
      mean_pot_count ~ s(distance, k = 5) + s(sum_rain, k = 5) + mws,
      data = dat
   )

summary(mod10)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## mean_pot_count ~ s(distance, k = 5) + s(sum_rain, k = 5) + mws
## 
## Parametric coefficients:
##             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)   0.4298     0.1559    2.76    0.0063 ** 
## mws           0.1834     0.0405    4.53 0.0000097 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##              edf Ref.df    F              p-value    
## s(distance) 3.88   3.99 54.5 < 0.0000000000000002 ***
## s(sum_rain) 2.08   2.27 13.2            0.0000012 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.529   Deviance explained = 54.3%
## GCV = 0.69678  Scale est. = 0.67267   n = 230

0.2.11 mod11.0 - s(Distance) + s(Windspeed) + s(Precipitation), family = tw()

This is the same as mod8 but using family = tw(), see ?family.mgcv for more on the families. The Tweedie distribution is used where the distribution has a positive mass at zero, but is continuous unlike the Poisson distribution that requires count data. The data visualisation shows clearly that the mean pot count data have this shape.

mod11.0 <-
   gam(
      mean_pot_count ~ s(distance, k = 5) + 
         s(mws, k = 5) + 
         s(sum_rain, k = 5),
      data = dat,
      family = tw()
   )

summary(mod11.0)
## 
## Family: Tweedie(p=1.047) 
## Link function: log 
## 
## Formula:
## mean_pot_count ~ s(distance, k = 5) + s(mws, k = 5) + s(sum_rain, 
##     k = 5)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.2053     0.0488    -4.2 0.000038 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##              edf Ref.df     F              p-value    
## s(distance) 3.17   3.63 79.02 < 0.0000000000000002 ***
## s(mws)      2.09   2.32  7.36              0.00025 ***
## s(sum_rain) 2.29   2.45 10.52             0.000011 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.651   Deviance explained = 61.5%
## -REML = 210.63  Scale est. = 0.36523   n = 230

Try fitting the same model using a different shrinkage smoother, ts is thin- plate splines.

0.2.12 mod11.1 - s(Distance, bs = “ts”) + s(Windspeed, bs = “ts”) + s(Precipitation, bs = “ts”), family = tw()

mod11.1 <-
   gam(
      mean_pot_count ~ s(distance, k = 5, bs = "ts") + 
         s(mws, k = 5, bs = "ts") + 
         s(sum_rain, k = 5, bs = "ts"),
      data = dat,
      family = tw()
   )

summary(mod11.1)
## 
## Family: Tweedie(p=1.048) 
## Link function: log 
## 
## Formula:
## mean_pot_count ~ s(distance, k = 5, bs = "ts") + s(mws, k = 5, 
##     bs = "ts") + s(sum_rain, k = 5, bs = "ts")
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.1998     0.0488    -4.1 0.000059 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##              edf Ref.df     F              p-value    
## s(distance) 2.74      4 71.05 < 0.0000000000000002 ***
## s(mws)      1.06      4  7.31        0.00000000035 ***
## s(sum_rain) 2.16      4 11.84        0.00000000004 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.638   Deviance explained = 60.3%
## -REML = 218.69  Scale est. = 0.36646   n = 230

This model, same structure as mod11.0, uses thin-plate splines to shrink the coefficients of the smooth to zero.

0.2.13 mod12 - s(Distance) + s(Precipitation) + Windspeed, family = tw()

Looking at mod11.1, mws is not significant as a smoothed term, and we’d suspect it was linear anyway, so add it as a linear term only.

mod12 <-
   gam(
      mean_pot_count ~ s(distance, k = 5) + s(sum_rain, k = 5) + mws,
      data = dat,
      family = tw()
   )

summary(mod12)
## 
## Family: Tweedie(p=1.048) 
## Link function: log 
## 
## Formula:
## mean_pot_count ~ s(distance, k = 5) + s(sum_rain, k = 5) + mws
## 
## Parametric coefficients:
##             Estimate Std. Error t value     Pr(>|t|)    
## (Intercept)  -0.7748     0.1293   -5.99 0.0000000083 ***
## mws           0.1586     0.0314    5.05 0.0000009032 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##              edf Ref.df    F              p-value    
## s(distance) 3.15   3.62 79.9 < 0.0000000000000002 ***
## s(sum_rain) 2.29   2.54 20.6          0.000000061 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.643   Deviance explained = 60.4%
## -REML = 212.62  Scale est. = 0.36632   n = 230

0.3 Compare the Models

0.3.1 AIC, BIC

models <- list(mod1 = mod1,
               mod2 = mod2,
               mod3 = mod3,
               mod4 = mod4,
               mod5 = mod5,
               mod6 = mod6,
               mod7 = mod7,
               mod8 = mod8,
               mod9 = mod9,
               mod10 = mod10,
               mod11.0 = mod11.0,
               mod11.1 = mod11.1,
               mod12 = mod12
               )
map_df(models, glance, .id = "model") %>%
   arrange(AIC)
## # A tibble: 13 x 7
##    model      df logLik   AIC   BIC deviance df.residual
##    <chr>   <dbl>  <dbl> <dbl> <dbl>    <dbl>       <dbl>
##  1 mod11.0  8.54  -200.  422.  462.     91.4        221.
##  2 mod12    7.45  -208.  436.  471.     94.0        223.
##  3 mod11.1  6.96  -209.  438.  471.     94.4        223.
##  4 mod7     8.83  -273.  567.  600.    145.         221.
##  5 mod8     9.59  -273.  567.  604.    145.         220.
##  6 mod6     9.83  -273.  568.  605.    145.         220.
##  7 mod10    7.96  -277.  571.  602.    149.         222.
##  8 mod9     8.74  -278.  576.  610.    152.         221.
##  9 mod4     6.84  -291.  598.  625.    169.         223.
## 10 mod3     5.85  -293.  600.  623.    172.         224.
## 11 mod2     5.83  -298.  609.  633.    180.         224.
## 12 mod5     5.83  -298.  609.  633.    180.         224.
## 13 mod1     4.84  -299.  610.  630.    182.         225.

0.3.2 R2

enframe(c(
   mod1 = summary(mod1)$r.sq,
   mod2 = summary(mod2)$r.sq,
   mod3 = summary(mod3)$r.sq,
   mod4 = summary(mod4)$r.sq,
   mod5 = summary(mod5)$r.sq,
   mod6 = summary(mod6)$r.sq,
   mod7 = summary(mod7)$r.sq,
   mod8 = summary(mod8)$r.sq,
   mod9 = summary(mod9)$r.sq,
   mod10 = summary(mod10)$r.sq,
   mod11.0 = summary(mod11.0)$r.sq,
   mod11.1 = summary(mod11.1)$r.sq,
   mod12 = summary(mod12)$r.sq
)) %>%
   arrange(desc(value))
## # A tibble: 13 x 2
##    name    value
##    <chr>   <dbl>
##  1 mod11.0 0.651
##  2 mod12   0.643
##  3 mod11.1 0.638
##  4 mod7    0.540
##  5 mod6    0.540
##  6 mod8    0.540
##  7 mod10   0.529
##  8 mod9    0.520
##  9 mod4    0.469
## 10 mod3    0.463
## 11 mod2    0.439
## 12 mod5    0.439
## 13 mod1    0.435

0.3.3 Coefficients

cat("\nmod1\n")
## 
## mod1
coef(mod1)
##   (Intercept) s(distance).1 s(distance).2 s(distance).3 s(distance).4 
##        1.0919      -10.6886        5.6207       -0.9475       -1.9453
cat("\nmod2\n")
## 
## mod2
coef(mod2)
##   (Intercept)      sum_rain s(distance).1 s(distance).2 s(distance).3 
##       1.16847      -0.07309     -10.28296       5.32884      -0.92761 
## s(distance).4 
##      -1.92327
cat("\nmod3\n")
## 
## mod3
coef(mod3)
##   (Intercept)           mws s(distance).1 s(distance).2 s(distance).3 
##        0.5928        0.1383      -10.8580        5.7526       -0.9273 
## s(distance).4 
##       -1.9768
cat("\nmod4\n")
## 
## mod4
coef(mod4)
##   (Intercept)      sum_rain           mws s(distance).1 s(distance).2 
##       0.66411      -0.08111       0.14208     -10.41768       5.43580 
## s(distance).3 s(distance).4 
##      -0.90480      -1.95363
cat("\nmod5\n")
## 
## mod5
coef(mod5)
##   (Intercept)      sum_rain s(distance).1 s(distance).2 s(distance).3 
##       1.16847      -0.07309     -10.28296       5.32884      -0.92761 
## s(distance).4 
##      -1.92327
cat("\nmod6\n")
## 
## mod6
coef(mod6)
##   (Intercept)      sum_rain s(distance).1 s(distance).2 s(distance).3 
##       1.03513       0.05417     -11.53555       6.18414      -0.91012 
## s(distance).4      s(mws).1      s(mws).2      s(mws).3      s(mws).4 
##      -2.04129       3.58007       8.88574       5.67541       1.30559
cat("\nmod7\n")
## 
## mod7
coef(mod7)
##   (Intercept) s(distance).1 s(distance).2 s(distance).3 s(distance).4 
##        1.0919      -11.3110        6.0268       -0.8972       -2.0312 
##      s(mws).1      s(mws).2      s(mws).3      s(mws).4 
##        3.4550        8.9379        5.8148        1.2085
cat("\nmod8\n")
## 
## mod8
coef(mod8)
##   (Intercept) s(distance).1 s(distance).2 s(distance).3 s(distance).4 
##        1.0919      -11.5274        6.1781       -0.9076       -2.0443 
##      s(mws).1      s(mws).2      s(mws).3      s(mws).4 s(sum_rain).1 
##        0.7218        0.3406        0.7937        0.8430       -0.5327 
## s(sum_rain).2 s(sum_rain).3 s(sum_rain).4 
##       -0.3428       -1.5569       -0.5915
cat("\nmod9\n")
## 
## mod9
coef(mod9)
##   (Intercept) s(distance).1 s(distance).2 s(distance).3 s(distance).4 
##        1.0919      -11.3267        6.0356       -0.9145       -2.0315 
## s(sum_rain).1 s(sum_rain).2 s(sum_rain).3 s(sum_rain).4 
##       48.5288      -28.2681       16.9986        1.4408
cat("\nmod10\n")
## 
## mod10
coef(mod10)
##   (Intercept)           mws s(distance).1 s(distance).2 s(distance).3 
##        0.4298        0.1834      -11.3882        6.0817       -0.9120 
## s(distance).4 s(sum_rain).1 s(sum_rain).2 s(sum_rain).3 s(sum_rain).4 
##       -2.0375       -0.4626        0.1845       -1.2396       -0.3232
cat("\nmod11.0\n")
## 
## mod11.0
coef(mod11.0)
##   (Intercept) s(distance).1 s(distance).2 s(distance).3 s(distance).4 
##       -0.2053       -2.5038        1.0412       -0.2882       -1.0477 
##      s(mws).1      s(mws).2      s(mws).3      s(mws).4 s(sum_rain).1 
##        0.2746        0.1912        0.5692        0.4779       -0.7130 
## s(sum_rain).2 s(sum_rain).3 s(sum_rain).4 
##       -0.2235       -1.5702       -0.5669
cat("\nmod11.1\n")
## 
## mod11.1
coef(mod11.1)
##   (Intercept) s(distance).1 s(distance).2 s(distance).3 s(distance).4 
##      -0.19984      -1.46252       0.36842      -0.28290      -0.94715 
##      s(mws).1      s(mws).2      s(mws).3      s(mws).4 s(sum_rain).1 
##       0.01161       0.01202       0.03989       0.23732      -0.52269 
## s(sum_rain).2 s(sum_rain).3 s(sum_rain).4 
##       0.32671      -1.38663      -0.30584
cat("\nmod12\n")
## 
## mod12
coef(mod12)
##   (Intercept)           mws s(distance).1 s(distance).2 s(distance).3 
##       -0.7748        0.1586       -2.4549        1.0081       -0.2937 
## s(distance).4 s(sum_rain).1 s(sum_rain).2 s(sum_rain).3 s(sum_rain).4 
##       -1.0442       -1.2567        0.6562       -1.6350       -0.3442

0.3.4 ANOVA

anova(mod1,
      mod2,
      mod3,
      mod4,
      mod5,
      mod6,
      mod7,
      mod8,
      mod9,
      mod10,
      mod11.0,
      mod11.1,
      mod12,
      test = "F")
## Analysis of Deviance Table
## 
## Model  1: mean_pot_count ~ s(distance, k = 5)
## Model  2: mean_pot_count ~ sum_rain + s(distance, k = 5)
## Model  3: mean_pot_count ~ mws + s(distance, k = 5)
## Model  4: mean_pot_count ~ sum_rain + mws + s(distance, k = 5)
## Model  5: mean_pot_count ~ sum_rain + s(distance + mws, k = 5)
## Model  6: mean_pot_count ~ sum_rain + s(distance, k = 5) + s(mws, k = 5)
## Model  7: mean_pot_count ~ s(distance, k = 5) + s(mws, k = 5)
## Model  8: mean_pot_count ~ s(distance, k = 5) + s(mws, k = 5) + s(sum_rain, 
##     k = 5)
## Model  9: mean_pot_count ~ s(distance, k = 5) + s(sum_rain, k = 5)
## Model 10: mean_pot_count ~ s(distance, k = 5) + s(sum_rain, k = 5) + mws
## Model 11: mean_pot_count ~ s(distance, k = 5) + s(mws, k = 5) + s(sum_rain, 
##     k = 5)
## Model 12: mean_pot_count ~ s(distance, k = 5, bs = "ts") + s(mws, k = 5, 
##     bs = "ts") + s(sum_rain, k = 5, bs = "ts")
## Model 13: mean_pot_count ~ s(distance, k = 5) + s(sum_rain, k = 5) + mws
##    Resid. Df Resid. Dev       Df Deviance       F               Pr(>F)    
## 1        225        182                                                   
## 2        224        180  0.99786      2.2    6.04               0.0148 *  
## 3        224        172  0.00449      7.6 4651.00       0.000000010075 ***
## 4        223        169  0.99805      2.7    7.45               0.0069 ** 
## 5        224        180 -1.00254    -10.3   28.26       0.000000252645 ***
## 6        220        145  4.00874     35.0   23.92 < 0.0000000000000002 ***
## 7        221        145 -1.00051     -0.7    1.79               0.1818    
## 8        220        145  0.94424      0.5    1.42               0.2329    
## 9        221        152 -0.95630     -6.9   19.87       0.000018302234 ***
## 10       222        149 -0.71152      2.3                                 
## 11       220        400  1.99362   -250.2                                 
## 12       221        418 -1.70062    -18.5   29.73       0.000000000075 ***
## 13       221        416  0.31889      2.2   19.15               0.0028 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

0.3.5 Visualise the Models

# generic function to print GAMs using mgcViz
p_gam <- function(x) {
   plot(x, allTerms = T) +
      l_points() +
      l_fitLine(linetype = 1)  +
      l_ciLine(linetype = 3) +
      l_ciBar() +
      l_rug()
}

print(p_gam(x = getViz(mod1)) +
         ggtitle("s(Distance)"),
      pages = 1)

print(p_gam(x = getViz(mod2)) +
         ggtitle("s(Distance) + Precipitation"),
      pages = 1)

print(p_gam(x = getViz(mod3)) +
         ggtitle("s(Distance) + Windspeed"),
      pages = 1)

print(p_gam(x = getViz(mod4)) +
         ggtitle("s(Distance) + Windspeed + Precipitation"),
      pages = 1)

print(p_gam(x = getViz(mod5)) +
         ggtitle("s(Distance + Windspeed) + Precipitation"),
      pages = 1)

print(p_gam(x = getViz(mod6)) +
         ggtitle("s(Distance) + s(Windspeed) + Precipitation"),
      pages = 1)

print(p_gam(x = getViz(mod7)) +
         ggtitle("s(Distance) + s(Windspeed)"),
      pages = 1)

print(p_gam(x = getViz(mod8)) +
         ggtitle("s(Distance) + s(Windspeed) + s(Precipitation)"),
      pages = 1)

print(p_gam(x = getViz(mod9)) +
         ggtitle("s(Distance) + s(Precipitation)"),
      pages = 1)

print(p_gam(x = getViz(mod10)) +
         ggtitle("s(Distance) + s(Precipitation) + Windspeed"),
      pages = 1)

print(p_gam(x = getViz(mod11.0)) +
   ggtitle("s(Distance) + s(Windspeed) + s(Precipitation), family = tw()"),
   pages = 1)

print(
   p_gam(x = getViz(mod11.1)) +
      ggtitle(
         "s(Distance, bs = 'ts') + s(Windspeed, bs = 'ts')\n+ s(Precipitation, bs = 'ts'), family = tw()"
      ),
   pages = 1
)

print(
   p_gam(x = getViz(mod12)) +
      ggtitle("s(Distance) + s(Precipitation) + Windspeed, family = tw()"),
   pages = 1
)

0.3.6 Check Best Model Fit

0.3.6.1 Mod11.0 - s(Distance) + s(Windspeed) + s(Precipitation), family = tw()

gam.check(mod11.0)

## 
## Method: REML   Optimizer: outer newton
## full convergence after 9 iterations.
## Gradient range [-0.00003951,0.00000204]
## (score 210.6 & scale 0.3652).
## Hessian positive definite, eigenvalue range [0.3606,1703].
## Model rank =  13 / 13 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##               k'  edf k-index             p-value    
## s(distance) 4.00 3.17    0.74 <0.0000000000000002 ***
## s(mws)      4.00 2.09    0.78 <0.0000000000000002 ***
## s(sum_rain) 4.00 2.29    0.76 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

0.4 Thoughts

This model, mod11.0, mean_pot_count ~ s(Distance) + s(Windspeed) + s(Precipitation) - family = tw(), lacks enough numbers of basis functions for two of three predictors, distance, wsp, but it’s close. Realistically we need to increase the k values for these predictors, but we’re lacking the data to do this.